Użyte biblioteki

library(dplyr)
library(plotly)
library(ggplot2)
library(readxl)
library(tidyr)
library(knitr)
library(kableExtra)
## Warning: pakiet 'kableExtra' został zbudowany w wersji R 4.1.2
library(TTR)
## Warning: pakiet 'TTR' został zbudowany w wersji R 4.1.2
library(data.table)

Gold Prices

Wczytywanie i czyszczenie danych:

  • nadanie właściwych typów danych,
  • pozostawienie do dalszych rozważań jedynie ceny w walucie USD,
  • zmiana nazw kolumn dla AM and PM fixing,
Gold.prices <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/Gold prices.csv", 
                        colClasses = c(rep("Date", 1),
                                       rep("numeric", 2),
                                       rep("NULL", 4)),
                        col.names = c('Date',
                                      'Morning.Fix.USD',
                                      'Afternoon.Fix.USD',
                                      rep("NULL", 4)),
                        header = TRUE)
  • uzupełnijnie brakujących wartości (NA) w kolumnach z ceną otwarcia i zamknięcia, wartością z sąsiedniej kolumny (w całym zbiorze nie ma sytuacji gdzie brakują obydwie)
  • dodanie kolumny USD z uśrednioną ceną otwarcia i zamknięcia
Gold.prices <- Gold.prices %>% 
  mutate(Morning.Fix.USD = coalesce(Morning.Fix.USD, Afternoon.Fix.USD),
         Afternoon.Fix.USD = coalesce(Afternoon.Fix.USD, Morning.Fix.USD),
         USD = (Morning.Fix.USD + Afternoon.Fix.USD) / 2)
  • utworzenie zbioru ze średnią roczną ceną złota
Gold.prices.yearly <- Gold.prices %>%
  mutate(Year = as.numeric(substr(Date, 1, 4))) %>%
  group_by(Year) %>% 
  summarize( USD = (mean(Morning.Fix.USD) + mean(Afternoon.Fix.USD)) / 2)

Podsumowanie danych:

Zbiór zawiera codzienne wyceny złota podczas sesji otwarcia i zamknięcia od dnia 1968-01-02 do 2021-09-29.

head(Gold.prices)
##         Date Morning.Fix.USD Afternoon.Fix.USD      USD
## 1 2021-09-29         1741.65           1737.15 1739.400
## 2 2021-09-28         1739.65           1733.75 1736.700
## 3 2021-09-27         1749.15           1755.30 1752.225
## 4 2021-09-24         1755.15           1746.80 1750.975
## 5 2021-09-23         1771.05           1750.00 1760.525
## 6 2021-09-22         1775.35           1773.40 1774.375
nrow(Gold.prices)
## [1] 13585
summary(Gold.prices)
##       Date            Morning.Fix.USD   Afternoon.Fix.USD      USD         
##  Min.   :1968-01-02   Min.   :  34.77   Min.   :  34.75   Min.   :  34.76  
##  1st Qu.:1981-06-10   1st Qu.: 280.50   1st Qu.: 280.30   1st Qu.: 280.27  
##  Median :1994-11-14   Median : 383.30   Median : 383.45   Median : 383.38  
##  Mean   :1994-11-16   Mean   : 575.17   Mean   : 574.98   Mean   : 575.07  
##  3rd Qu.:2008-04-23   3rd Qu.: 841.75   3rd Qu.: 838.25   3rd Qu.: 841.00  
##  Max.   :2021-09-29   Max.   :2061.50   Max.   :2067.15   Max.   :2058.15

Analiza wartości ceny złota

Na wykresie widzimy zmianę cen otwarcia w czasie.

ggplot(Gold.prices, aes(x = Date)) +
  geom_point(aes(y = Morning.Fix.USD), color = "gold")


World development indicators

Wczytywanie zbioru danych

World_Development_Indicators <- read_excel("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/World_Development_Indicators.xlsx", 
                                           na = '..', 
                                           range = "A1:BC44305")

Czyszczenie zbioru:

  • usunięcie elementów innych niż kraje z kolumny Country Name, pozostawienie całego swiata w zbiorze krajów,
World_Development_Indicators <- World_Development_Indicators %>%
  filter(!`Country Name` %in% c("Low & middle income","Low income","Lower middle income","Middle income","Upper middle income","High income"))
  • wyodrębnienie kodów serii do osobnej tabeli oraz usunięcie jej z przetwarzanego zbioru,
  • przeniesienie zmiennych do osobnych kolumn,
World_Development_Indicators.Series_Codes <- select(World_Development_Indicators, `Series Name`, `Series Code`)
World_Development_Indicators <- select(World_Development_Indicators, -`Series Code`)
  • przeniesienie pojedyńczych obserwacji do osobnych wierszy
World_Development_Indicators <- World_Development_Indicators %>%
  pivot_longer(cols = `1970 [YR1970]`:`2020 [YR2020]`, names_to = "Year") %>%
  group_by(`Series Name`) %>%
  mutate(row = row_number()) %>%
  tidyr::pivot_wider(names_from = `Series Name`, values_from = value) %>%
  select(-row)
  • poprawa kolumny Year: wyodrębnienie lat jako numerycznych wartości
World_Development_Indicators <- World_Development_Indicators %>%
  mutate(Year = as.numeric(substr(Year, 1, 4)))
  • pozostawiam 100 atrybutów, które mają najwięcej danych (najmniej wartości pustych) - nie jest to idealny sposób, ponieważ część państw może nie podawać w ogóle danych na różne wskaźniki,
World_Development_Indicators %>%
  select(-(1:3)) %>%
  summarise_all(funs(sum(is.na(.)))) %>%
  t %>%
  data.frame() %>%
  top_n(-100)
  

Podsumowanie danych:

Zbiór zawiera pierdyliard wskaźników.

Lista krajów dostępnych w zbiorze
Afghanistan
Albania
Algeria
American Samoa
Andorra
Angola
Antigua and Barbuda
Argentina
Armenia
Aruba
Australia
Austria
Azerbaijan
Bahamas, The
Bahrain
Bangladesh
Barbados
Belarus
Belgium
Belize
Benin
Bermuda
Bhutan
Bolivia
Brazil
British Virgin Islands
Bulgaria
Burundi
Cambodia
Cameroon
Canada
Cayman Islands
Central African Republic
Chad
Channel Islands
Chile
China
Colombia
Comoros
Congo, Dem. Rep. 
Congo, Rep. 
Costa Rica
Croatia
Cuba
Curacao
Cyprus
Czech Republic
Denmark
Djibouti
Dominica
Dominican Republic
Ecuador
Egypt, Arab Rep. 
El Salvador
Equatorial Guinea
Eritrea
Estonia
Eswatini
Ethiopia
Faroe Islands
Fiji
Finland
France
French Polynesia
Gabon
Gambia, The
Georgia
Germany
Ghana
Gibraltar
Greece
Greenland
Grenada
Guam
Guatemala
Guinea
Guinea-Bissau
Guyana
Haiti
Honduras
Hong Kong SAR, China
Hungary
Iceland
India
Indonesia
Iran, Islamic Rep. 
Iraq
Ireland
Isle of Man
Israel
Italy
Jamaica
Japan
Jordan
Kazakhstan
Kenya
Kiribati
Korea, Dem. People’s Rep. 
Korea, Rep. 
Kosovo
Kuwait
Kyrgyz Republic
Lao PDR
Latvia
Lebanon
Lesotho
Liberia
Libya
Liechtenstein
Lithuania
Luxembourg
Macao SAR, China
Madagascar
Malawi
Malaysia
Maldives
Mali
Malta
Marshall Islands
Mauritania
Mauritius
Mexico
Micronesia, Fed. Sts.
Moldova
Monaco
Mongolia
Montenegro
Morocco
Mozambique
Myanmar
Namibia
Nepal
Netherlands
New Caledonia
New Zealand
Nicaragua
Niger
Nigeria
North Macedonia
Norway
Oman
Pakistan
Panama
Papua New Guinea
Paraguay
Peru
Philippines
Poland
Portugal
Puerto Rico
Qatar
Romania
Russian Federation
Rwanda
San Marino
Sao Tome and Principe
Saudi Arabia
Senegal
Serbia
Seychelles
Sierra Leone
Singapore
Sint Maarten (Dutch part)
Slovak Republic
Slovenia
Solomon Islands
South Africa
South Sudan
Spain
St. Vincent and the Grenadines
Sudan
Suriname
Sweden
Switzerland
Syrian Arab Republic
Tajikistan
Tanzania
Thailand
Togo
Tonga
Trinidad and Tobago
Tunisia
Turkey
Turks and Caicos Islands
Tuvalu
Uganda
Ukraine
United Arab Emirates
United Kingdom
United States
Uruguay
Uzbekistan
Vanuatu
Venezuela, RB
Vietnam
Virgin Islands (U.S.)
West Bank and Gaza
Yemen, Rep. 
Zambia
Zimbabwe
Bosnia and Herzegovina
World
Lista dostępnych wskaźników
Country Name
Country Code
Year
Urban population growth (annual %)
Urban population (% of total population)
Value lost due to electrical outages (% of sales for affected firms)
Urban population
Urban land area (sq. km)
Unemployment, total (% of total labor force) (national estimate)
Unemployment with advanced education (% of total labor force with advanced education)
Transport services (% of commercial service exports)
Transport services (% of commercial service imports)
Trained teachers in upper secondary education (% of total teachers)
Trained teachers in secondary education (% of total teachers)
Trained teachers in primary education (% of total teachers)
Trademark applications, direct nonresident
Trade in services (% of GDP)
Trade (% of GDP)
Trademark applications, direct resident
Trademark applications, total
Total natural resources rents (% of GDP)
Total greenhouse gas emissions (kt of CO2 equivalent)
Total greenhouse gas emissions (% change from 1990)
Total alcohol consumption per capita (liters of pure alcohol, projected estimates, 15+ years of age)
Time required to build a warehouse (days)
Time required to enforce a contract (days)
Time required to get electricity (days)
Taxes on goods and services (current LCU)
Taxes on income, profits and capital gains (% of revenue)
Taxes on income, profits and capital gains (% of total taxes)
Taxes on income, profits and capital gains (current LCU)
Taxes on international trade (% of revenue)
Taxes on international trade (current LCU)
Taxes on goods and services (% value added of industry and services)
Taxes on goods and services (% of revenue)
Taxes on exports (current LCU)
Taxes on exports (% of tax revenue)
Taxes less subsidies on products (current US\() </td> </tr> <tr> <td style="text-align:left;"> Taxes less subsidies on products (current LCU) </td> </tr> <tr> <td style="text-align:left;"> Taxes less subsidies on products (constant LCU) </td> </tr> <tr> <td style="text-align:left;"> Tax revenue (current LCU) </td> </tr> <tr> <td style="text-align:left;"> Tax revenue (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Tax payments (number) </td> </tr> <tr> <td style="text-align:left;"> Survival to age 65, female (% of cohort) </td> </tr> <tr> <td style="text-align:left;"> Survival to age 65, male (% of cohort) </td> </tr> <tr> <td style="text-align:left;"> Suicide mortality rate (per 100,000 population) </td> </tr> <tr> <td style="text-align:left;"> Suicide mortality rate, female (per 100,000 female population) </td> </tr> <tr> <td style="text-align:left;"> Suicide mortality rate, male (per 100,000 male population) </td> </tr> <tr> <td style="text-align:left;"> Stocks traded, turnover ratio of domestic shares (%) </td> </tr> <tr> <td style="text-align:left;"> Stocks traded, total value (current US\))
Stocks traded, total value (% of GDP)
Strength of legal rights index (0=weak to 12=strong)
Short-term debt (% of total reserves)
Short-term debt (% of total external debt)
Short-term debt (% of exports of goods, services and primary income)
Share of youth not in education, employment or training, female (% of female youth population)
Share of youth not in education, employment or training, male (% of male youth population)
Share of youth not in education, employment or training, total (% of youth population)
Services, value added (% of GDP)
Service imports (BoP, current US\() </td> </tr> <tr> <td style="text-align:left;"> Service exports (BoP, current US\))
Self-employed, male (% of male employment) (modeled ILO estimate)
Self-employed, total (% of total employment) (modeled ILO estimate)
Self-employed, female (% of female employment) (modeled ILO estimate)
Secure Internet servers
Secure Internet servers (per 1 million people)
Secondary education, teachers
Secondary education, pupils
Scientific and technical journal articles
School enrollment, tertiary (gross), gender parity index (GPI)
S&P Global Equity Indices (annual % change)
Rural population growth (annual %)
Rural population (% of total population)
Rural population
Researchers in R&D (per million people)
Research and development expenditure (% of GDP)
Renewable energy consumption (% of total final energy consumption)
Renewable internal freshwater resources per capita (cubic meters)
Renewable internal freshwater resources, total (billion cubic meters)
Renewable electricity output (% of total electricity output)
Real interest rate (%)
Pupil-teacher ratio, upper secondary
Pupil-teacher ratio, tertiary
Pupil-teacher ratio, secondary
Pupil-teacher ratio, primary
Pupil-teacher ratio, preprimary
Rail lines (total route-km)
Railways, goods transported (million ton-km)
Railways, passengers carried (million passenger-km)
Proportion of seats held by women in national parliaments (%)
Primary income payments (BoP, current US\() </td> </tr> <tr> <td style="text-align:left;"> Primary income receipts (BoP, current US\))
Primary school starting age (years)
Prevalence of undernourishment (% of population)
Portfolio investment, net (BoP, current US\() </td> </tr> <tr> <td style="text-align:left;"> Portfolio investment, bonds (PPG + PNG) (NFL, current US\))
Portfolio equity, net inflows (BoP, current US\() </td> </tr> <tr> <td style="text-align:left;"> Population, total </td> </tr> <tr> <td style="text-align:left;"> Population, male </td> </tr> <tr> <td style="text-align:left;"> Population, male (% of total population) </td> </tr> <tr> <td style="text-align:left;"> Population, female (% of total population) </td> </tr> <tr> <td style="text-align:left;"> Population, female </td> </tr> <tr> <td style="text-align:left;"> Population living in slums (% of urban population) </td> </tr> <tr> <td style="text-align:left;"> Population in urban agglomerations of more than 1 million </td> </tr> <tr> <td style="text-align:left;"> Population in the largest city (% of urban population) </td> </tr> <tr> <td style="text-align:left;"> Population in largest city </td> </tr> <tr> <td style="text-align:left;"> Population growth (annual %) </td> </tr> <tr> <td style="text-align:left;"> Population density (people per sq. km of land area) </td> </tr> <tr> <td style="text-align:left;"> Population ages 65 and above (% of total population) </td> </tr> <tr> <td style="text-align:left;"> Population ages 15-64 (% of total population) </td> </tr> <tr> <td style="text-align:left;"> Population ages 0-14 (% of total population) </td> </tr> <tr> <td style="text-align:left;"> PM2.5 air pollution, mean annual exposure (micrograms per cubic meter) </td> </tr> <tr> <td style="text-align:left;"> PM2.5 air pollution, population exposed to levels exceeding WHO guideline value (% of total) </td> </tr> <tr> <td style="text-align:left;"> PM2.5 pollution, population exposed to levels exceeding WHO Interim Target-1 value (% of total) </td> </tr> <tr> <td style="text-align:left;"> PM2.5 pollution, population exposed to levels exceeding WHO Interim Target-2 value (% of total) </td> </tr> <tr> <td style="text-align:left;"> PM2.5 pollution, population exposed to levels exceeding WHO Interim Target-3 value (% of total) </td> </tr> <tr> <td style="text-align:left;"> Part time employment, total (% of total employment) </td> </tr> <tr> <td style="text-align:left;"> Patent applications, nonresidents </td> </tr> <tr> <td style="text-align:left;"> Patent applications, residents </td> </tr> <tr> <td style="text-align:left;"> Number of under-five deaths </td> </tr> <tr> <td style="text-align:left;"> Nitrous oxide emissions (% change from 1990) </td> </tr> <tr> <td style="text-align:left;"> Nitrous oxide emissions (thousand metric tons of CO2 equivalent) </td> </tr> <tr> <td style="text-align:left;"> Nitrous oxide emissions in energy sector (% of total) </td> </tr> <tr> <td style="text-align:left;"> Net primary income (Net income from abroad) (current US\))
Net primary income (Net income from abroad) (current LCU)
Net primary income (Net income from abroad) (constant LCU)
Net primary income (BoP, current US\() </td> </tr> <tr> <td style="text-align:left;"> Net official development assistance received (current US\))
Net official aid received (current US\() </td> </tr> <tr> <td style="text-align:left;"> Net domestic credit (current LCU) </td> </tr> <tr> <td style="text-align:left;"> Net acquisition of financial assets (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Natural gas rents (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Mortality rate, infant (per 1,000 live births) </td> </tr> <tr> <td style="text-align:left;"> Mortality caused by road traffic injury (per 100,000 population) </td> </tr> <tr> <td style="text-align:left;"> Methane emissions (% change from 1990) </td> </tr> <tr> <td style="text-align:left;"> Methane emissions (kt of CO2 equivalent) </td> </tr> <tr> <td style="text-align:left;"> Methane emissions in energy sector (thousand metric tons of CO2 equivalent) </td> </tr> <tr> <td style="text-align:left;"> Merchandise exports to high-income economies (% of total merchandise exports) </td> </tr> <tr> <td style="text-align:left;"> Manufacturing, value added (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Literacy rate, adult total (% of people ages 15 and above) </td> </tr> <tr> <td style="text-align:left;"> Life expectancy at birth, total (years) </td> </tr> <tr> <td style="text-align:left;"> Lending interest rate (%) </td> </tr> <tr> <td style="text-align:left;"> Land area (sq. km) </td> </tr> <tr> <td style="text-align:left;"> Labor force, total </td> </tr> <tr> <td style="text-align:left;"> International tourism, expenditures (current US\))
International migrant stock (% of population)
Interest payments (% of expense)
Inflation, consumer prices (annual %)
Individuals using the Internet (% of population)
Income share held by highest 10%
Imports of goods and services (current US\() </td> </tr> <tr> <td style="text-align:left;"> Imports of goods and services (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> ICT goods exports (% of total goods exports) </td> </tr> <tr> <td style="text-align:left;"> Gross savings (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Gross national expenditure (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Gross national expenditure (current US\))
Gross savings (current US\() </td> </tr> <tr> <td style="text-align:left;"> Gross domestic savings (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Gross domestic savings (current US\))
Government expenditure on education, total (% of GDP)
Goods exports (BoP, current US\() </td> </tr> <tr> <td style="text-align:left;"> Goods imports (BoP, current US\))
GNI growth (annual %)
GDP per capita (current US\() </td> </tr> <tr> <td style="text-align:left;"> GDP per capita growth (annual %) </td> </tr> <tr> <td style="text-align:left;"> GDP growth (annual %) </td> </tr> <tr> <td style="text-align:left;"> GDP (current US\))
Fuel exports (% of merchandise exports)
Fuel imports (% of merchandise imports)
Food exports (% of merchandise exports)
Food imports (% of merchandise imports)
External debt stocks (% of GNI)
Exports of goods and services (current US\() </td> </tr> <tr> <td style="text-align:left;"> Exports of goods and services (annual % growth) </td> </tr> <tr> <td style="text-align:left;"> Expense (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Employment in industry (% of total employment) (modeled ILO estimate) </td> </tr> <tr> <td style="text-align:left;"> Employment in services (% of total employment) (modeled ILO estimate) </td> </tr> <tr> <td style="text-align:left;"> Employment in agriculture (% of total employment) (modeled ILO estimate) </td> </tr> <tr> <td style="text-align:left;"> Employers, total (% of total employment) (modeled ILO estimate) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from renewable sources, excluding hydroelectric (kWh) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from renewable sources, excluding hydroelectric (% of total) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from oil, gas and coal sources (% of total) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from coal sources (% of total) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from hydroelectric sources (% of total) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from natural gas sources (% of total) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from nuclear sources (% of total) </td> </tr> <tr> <td style="text-align:left;"> Ease of doing business score (0 = lowest performance to 100 = best performance) </td> </tr> <tr> <td style="text-align:left;"> Diabetes prevalence (% of population ages 20 to 79) </td> </tr> <tr> <td style="text-align:left;"> Deposit interest rate (%) </td> </tr> <tr> <td style="text-align:left;"> Current health expenditure per capita (current US\))
Current health expenditure (% of GDP)
Consumer price index (2010 = 100)
CO2 emissions from solid fuel consumption (% of total)
CO2 emissions from solid fuel consumption (kt)
CO2 emissions from transport (% of total fuel combustion)
CO2 intensity (kg per kg of oil equivalent energy use)
CO2 emissions from residential buildings and commercial and public services (% of total fuel combustion)
CO2 emissions from other sectors, excluding residential buildings and commercial and public services (% of total fuel combustion)
CO2 emissions from manufacturing industries and construction (% of total fuel combustion)
CO2 emissions from liquid fuel consumption (kt)
CO2 emissions from liquid fuel consumption (% of total)
CO2 emissions from gaseous fuel consumption (kt)
CO2 emissions from gaseous fuel consumption (% of total)
CO2 emissions from electricity and heat production, total (% of total fuel combustion)
CO2 emissions (metric tons per capita)
CO2 emissions (kt)
CO2 emissions (kg per PPP $ of GDP)
CO2 emissions (kg per 2017 PPP $ of GDP)
CO2 emissions (kg per 2010 US$ of GDP)
Birth rate, crude (per 1,000 people)
Bank capital to assets ratio (%)
Average number of visits or required meetings with tax officials (for affected firms)
Average precipitation in depth (mm per year)
Automated teller machines (ATMs) (per 100,000 adults)
Account ownership at a financial institution or with a mobile-money-service provider (% of population ages 15+)
Access to electricity (% of population)

S&P Composite

Wczytywanie danych

SP.Composite <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/S&P Composite.csv")

Czyszczenie danych:

  • zmiana nazwy oraz typ na właściwy kolumny z datą
  • zmiana nazwy Cyclicaly.Adjusted.PE.Ratio na skrót CAPE -
SP.Composite <- SP.Composite %>%
  rename(Date = Year,
         CAPE = Cyclically.Adjusted.PE.Ratio) %>%
  mutate(Date = as.Date(Date))

Znaczenie atrybutów:

  • S.P.Composite - nominalna wartość indexu,
  • Divident - nominalna dywidenta,
  • Earnings - nominalne zarobki na indeksie,
  • CPI - wskaźnik cen towarów i usług konsumpcyjnych. Najpopularniejsza na świecie miara inflacji/deflacji,
  • Long.Interest.Rate - stopy procentowe dziesięcioletnich obligacji rządowych,
  • Real.Price - realna wartość indexu,
  • Real.Divident - realna dywidenta,
  • Real.Earnings - realne zarobki na indeksie,
  • CAPE (Cyclicaly.Adjusted.PE.Ratio) - cyklicznie dostosowywany wskaźnik ceny do zysków. Definiuje się go jako cenę podzieloną przez średnią z dziesięciu lat zarobków, skorygowaną o inflację.

Podsumowanie danych

head(SP.Composite)
##         Date S.P.Composite Dividend Earnings      CPI Long.Interest.Rate
## 1 2021-10-31      3700.650       NA       NA 260.1097               0.93
## 2 2021-09-30      4493.280       NA       NA 273.9832               1.29
## 3 2021-08-31      4454.206       NA       NA 273.6565               1.28
## 4 2021-07-31      4363.713       NA       NA 273.0030               1.32
## 5 2021-06-30      4238.490 57.86504   158.74 271.6960               1.52
## 6 2021-05-31      4167.850 57.78782   148.56 269.1950               1.62
##   Real.Price Real.Dividend Real.Earnings     CAPE
## 1   3700.650            NA            NA 33.73946
## 2   4477.204            NA            NA 38.34228
## 3   4443.570            NA            NA 38.09043
## 4   4363.713            NA            NA 37.44349
## 5   4258.879      58.14340      159.5036 36.69631
## 6   4226.807      58.60528      150.6615 36.55215
ncol(SP.Composite)
## [1] 10
nrow(SP.Composite)
## [1] 1810
summary(SP.Composite)
##       Date            S.P.Composite         Dividend          Earnings       
##  Min.   :1871-01-31   Min.   :   2.730   Min.   : 0.1800   Min.   :  0.1600  
##  1st Qu.:1908-10-07   1st Qu.:   7.902   1st Qu.: 0.4202   1st Qu.:  0.5608  
##  Median :1946-06-15   Median :  17.370   Median : 0.8717   Median :  1.4625  
##  Mean   :1946-06-15   Mean   : 327.968   Mean   : 6.7321   Mean   : 15.3714  
##  3rd Qu.:1984-02-21   3rd Qu.: 164.400   3rd Qu.: 7.0525   3rd Qu.: 14.7258  
##  Max.   :2021-10-31   Max.   :4493.280   Max.   :59.6800   Max.   :158.7400  
##                                          NA's   :4         NA's   :4         
##       CPI         Long.Interest.Rate   Real.Price     Real.Dividend   
##  Min.   :  6.28   Min.   : 0.620     Min.   :  73.9   Min.   : 5.445  
##  1st Qu.: 10.20   1st Qu.: 3.171     1st Qu.: 186.6   1st Qu.: 9.417  
##  Median : 20.35   Median : 3.815     Median : 283.3   Median :14.411  
##  Mean   : 62.39   Mean   : 4.504     Mean   : 622.0   Mean   :17.498  
##  3rd Qu.:102.28   3rd Qu.: 5.139     3rd Qu.: 707.0   3rd Qu.:22.301  
##  Max.   :273.98   Max.   :15.320     Max.   :4477.2   Max.   :63.511  
##                                                       NA's   :4       
##  Real.Earnings          CAPE       
##  Min.   :  4.576   Min.   : 4.784  
##  1st Qu.: 14.063   1st Qu.:11.898  
##  Median : 23.524   Median :16.381  
##  Mean   : 34.907   Mean   :17.215  
##  3rd Qu.: 43.768   3rd Qu.:20.913  
##  Max.   :159.504   Max.   :44.198  
##  NA's   :4         NA's   :120

Currency Exchange Rates

Wczytywanie danych:

Currency.Exchange.Rates <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/CurrencyExchangeRates.csv")

Czyszczenie danych:

  • zmiana typu kolumny z datą
Currency.Exchange.Rates <- Currency.Exchange.Rates %>% 
  mutate(Date = as.Date(Date))

Podsumowanie danych:

Zbiór zawiera codzienny kurs wymiany walut od dnia 1995-01-02 do 2018-05-02.

Currency.Exchange.Rates %>%
  names %>%
  data.frame() %>%
  rename('Dostępne waluty' = '.') %>%
  kable %>%
  kable_styling("striped", full_width = F) %>%
  scroll_box(width = '400px', height = '400px')
Dostępne waluty
Date
Algerian.Dinar
Australian.Dollar
Bahrain.Dinar
Bolivar.Fuerte
Botswana.Pula
Brazilian.Real
Brunei.Dollar
Canadian.Dollar
Chilean.Peso
Chinese.Yuan
Colombian.Peso
Czech.Koruna
Danish.Krone
Euro
Hungarian.Forint
Icelandic.Krona
Indian.Rupee
Indonesian.Rupiah
Iranian.Rial
Israeli.New.Sheqel
Japanese.Yen
Kazakhstani.Tenge
Korean.Won
Kuwaiti.Dinar
Libyan.Dinar
Malaysian.Ringgit
Mauritian.Rupee
Mexican.Peso
Nepalese.Rupee
New.Zealand.Dollar
Norwegian.Krone
Nuevo.Sol
Pakistani.Rupee
Peso.Uruguayo
Philippine.Peso
Polish.Zloty
Qatar.Riyal
Rial.Omani
Russian.Ruble
Saudi.Arabian.Riyal
Singapore.Dollar
South.African.Rand
Sri.Lanka.Rupee
Swedish.Krona
Swiss.Franc
Thai.Baht
Trinidad.And.Tobago.Dollar
Tunisian.Dinar
U.A.E..Dirham
U.K..Pound.Sterling
U.S..Dollar

Bitcoin dataset

Wczytywanie danych:

Bitcoin.prices <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/Bitcoin/BCHAIN-MKPRU.csv",
                           colClasses = c(rep("Date", 1),
                                       rep("numeric", 1)),
                           col.names = c('Date',
                                      'USD'))

Bitcoin.trade.volume <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/Bitcoin/BCHAIN-TRVOU.csv",
                           colClasses = c(rep("Date", 1),
                                       rep("numeric", 1)),
                           col.names = c('Date',
                                      'Trade Volume'))

Bitcoin.mine.difficulty <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/Bitcoin/BCHAIN-DIFF.csv",
                           colClasses = c(rep("Date", 1),
                                       rep("numeric", 1)),
                           col.names = c('Date',
                                      'Mine difficulty'))

Bitcoin.hash.rate <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/Bitcoin/BCHAIN-HRATE.csv",
                           colClasses = c(rep("Date", 1),
                                       rep("numeric", 1)),
                           col.names = c('Date',
                                      'Hash rate'))
Bitcoin <- Bitcoin.prices %>%
  merge(Bitcoin.trade.volume, by = "Date") %>%
  merge(Bitcoin.mine.difficulty, by = "Date") %>%
  merge(Bitcoin.hash.rate, by = "Date")

Podsumowanie danych:

Zestaw danych zawiera codzienne informacje od początku istnienia Bitcoina.

Znaczenie atrybutów: * USD - Bitcoin Market Price USD,Average USD market price across major bitcoin exchanges. * Trade volume - Bitcoin USD Exchange Trade Volume,The total USD value of trading volume on major bitcoin exchanges. * Mine difficulty - Bitcoin Difficulty,A relative measure of how difficult it is to find a new block. The difficulty is adjusted periodically as a function of how much hashing power has been deployed by the network of miners. * Hash rate - Bitcoin Hash Rate,The estimated number of tera hashes per second (trillions of hashes per second) the Bitcoin network is performing.

tail(Bitcoin)
##            Date      USD Trade.Volume Mine.difficulty Hash.rate
## 4654 2021-09-30 41522.38    221224597    1.899764e+13 161488615
## 4655 2021-10-01 43757.81    360342502    1.899764e+13 132212901
## 4656 2021-10-02 48140.11    688291407    1.899764e+13 177543039
## 4657 2021-10-03 47727.10    184243788    1.899764e+13 141656680
## 4658 2021-10-04 48205.72    183312374    1.900912e+13 147411968
## 4659 2021-10-05 49143.95    370887916    1.989305e+13 162177736
summary(Bitcoin)
##       Date                 USD            Trade.Volume      
##  Min.   :2009-01-03   Min.   :    0.00   Min.   :0.000e+00  
##  1st Qu.:2012-03-12   1st Qu.:    7.21   1st Qu.:1.948e+05  
##  Median :2015-05-21   Median :  431.89   Median :6.824e+06  
##  Mean   :2015-05-21   Mean   : 5132.38   Mean   :1.467e+08  
##  3rd Qu.:2018-07-28   3rd Qu.: 6496.35   3rd Qu.:1.484e+08  
##  Max.   :2021-10-05   Max.   :63554.44   Max.   :5.352e+09  
##  Mine.difficulty       Hash.rate        
##  Min.   :0.000e+00   Min.   :        0  
##  1st Qu.:1.689e+06   1st Qu.:       12  
##  Median :4.881e+10   Median :   356089  
##  Mean   :3.665e+12   Mean   : 26458258  
##  3rd Qu.:5.364e+12   3rd Qu.: 38265984  
##  Max.   :2.505e+13   Max.   :198514006


Analizy:


Animowany wykres zmiany liczby ludności w czasie (Odpalić z puszczoną w tle piosenką Queen “Another One Bites the Dust”):

World.Population.Top10 
## # A tibble: 561 x 5
## # Groups:   Year [51]
##    `Country Name`      Year Population `Population in mln`  Rank
##    <chr>              <dbl>      <dbl>               <dbl> <dbl>
##  1 China               1970  818315000               818.     11
##  2 India               1970  555189797               555.     10
##  3 United States       1970  205052000               205.      9
##  4 Russian Federation  1970  130404000               130.      8
##  5 Indonesia           1970  114793179               115.      7
##  6 Japan               1970  103403000               103.      6
##  7 Brazil              1970   95113265                95.1     5
##  8 Germany             1970   78169289                78.2     4
##  9 Bangladesh          1970   64232486                64.2     3
## 10 Pakistan            1970   58142062                58.1     2
## # ... with 551 more rows

Samobójstwa kobiet i mężczyzn na świecie w latach 2000-2019:

Wniosek: Liczba samobójstw jest w trendzie spadkowym, zdrowie psychiczne ludzkości się polepsza.


Korelacja ceny złota do ceny BTC:

gold <- Gold.prices %>%
  select(Date, USD) %>%
  arrange(desc(row_number())) %>%
  filter( Date < '2021-09-29') %>%
  filter( Date > '2016-01-01') %>%
  rename( USD.gold = USD )

BTC_price <- Bitcoin.prices %>%
  arrange(desc(row_number())) %>%
  filter( Date < '2021-09-29') %>%
  filter( Date > '2016-01-01') %>%
  rename( USD.BTC = USD )

gold.btc <- gold %>%
  inner_join(BTC_price, by = 'Date')


cor.gold.btc <- cor(gold.btc$USD.gold, gold.btc$USD.BTC, use = "complete.obs")

coeff.gold.btc <- max(gold.btc$USD.BTC)/max(gold.btc$USD.gold)

gold.btc %>%
  ggplot( aes(x = Date) ) +
  geom_line( aes(y = USD.gold), color = 'gold' ) +
  geom_line( aes(y = USD.BTC/coeff.gold.btc), color = 'orange' ) + 
  scale_y_continuous(
    name = "Cena złota",
    sec.axis = sec_axis(~.*coeff.gold.btc, name="Cena BTC")
  ) +
  ggtitle("Cena złota oraz BTC [USD]") +
  theme_bw() + 
  theme(
    axis.title.y = element_text(color = 'gold', size=13),
    axis.title.y.right = element_text(color = 'orange', size=13),
    axis.text.y = element_text(color = 'gold', size=13),
    axis.text.y.right = element_text(color = 'orange', size=13)
  )

Wniosek: Pomimo korelacji na poziomie 0.68 wizualna inspekcja nie pokazuje jasnej zależności pomiędzy cenami porównywanych aktywów.

Korelacja złota oraz BTC z indexem S&P Composite

Gold.prices.monthly <- Gold.prices %>%
  mutate(Date = substr(Date, 1, 7)) %>%
  group_by(Date) %>% 
  summarize( USD = (mean(Morning.Fix.USD) + mean(Afternoon.Fix.USD)) / 2)

SP.df <- SP.Composite %>%
  mutate(Date = substr(Date, 1, 7)) %>%
  select(Date, S.P.Composite)

gold.SP <- Gold.prices.monthly %>%
  inner_join(SP.df, by = 'Date') %>%
  mutate(Date = as.Date(paste0(Date,'-01')))

cor(gold.SP$USD, gold.SP$S.P.Composite, use = "complete.obs")
## [1] 0.8181548
coeff.gold.SP <- max(gold.SP$S.P.Composite)/max(gold.SP$USD)

gold.SP %>%
  ggplot( aes(x = Date) ) +
  geom_line( aes(y = USD), color = 'gold' ) +
  geom_line( aes(y = S.P.Composite/coeff.gold.SP), color = 'royalblue' ) + 
  scale_y_continuous(
    name = "Cena złota",
    sec.axis = sec_axis(~.*coeff.gold.SP, name="Cena S&P Composite")
  ) +
  ggtitle("Cena złota oraz indeksu S&P Composite [USD]") +
  theme_bw() + 
  theme(
    axis.title.y = element_text(color = 'gold', size=13),
    axis.title.y.right = element_text(color = 'royalblue', size=13),
    axis.text.y = element_text(color = 'gold', size=13),
    axis.text.y.right = element_text(color = 'royalblue', size=13)
  )

Wniosek: Pomimo korelacji na poziomie 0.82 wizualna inspekcja nie pokazuje jasnej zależności pomiędzy cenami porównywanych aktywów.


Średnie kroczące na cenie złota

MA1 = 7
MA2 = 19

Gold.prices %>%
  arrange(desc(row_number())) %>%
  mutate(MA1 = SMA(USD, MA1),
         MA2 = SMA(USD, MA2)) %>%
  filter(!is.na(MA2))

Zbadanie najwyższych korelacji wskaźników rozwoju USA z ceną złota.

USA.WDI <- World_Development_Indicators %>%
  filter(`Country Name` == 'United States') %>%
  merge(Gold.prices.yearly, by = 'Year') %>%
  select_if(~ !any(is.na(.)))
  
USA.WDI.to_cor <- USA.WDI %>%
  select(-(1:3))

num_col=ncol(USA.WDI.to_cor[,-1])
out_indx <-  which(upper.tri(diag(num_col))) 
cor_cols <- USA.WDI.to_cor  %>%
            do(melt(cor(.[,-1], use="pairwise.complete.obs"), value.name="cor")[out_indx,])
## Warning in cor(.[, -1], use = "pairwise.complete.obs"): odchylenie standardowe
## wynosi zero
## Warning in melt(cor(.[, -1], use = "pairwise.complete.obs"), value.name =
## "cor"): The melt generic in data.table has been passed a matrix and will
## attempt to redirect to the relevant reshape2 method; please note that reshape2
## is deprecated, and this redirection is now deprecated as well. To continue
## using melt methods from reshape2 while both libraries are attached, e.g.
## melt.list, you can prepend the namespace like reshape2::melt(cor(.[, -1], use
## = "pairwise.complete.obs")). In the next version, this warning will become an
## error.
cor_cols <- cor_cols %>%
  filter(Var2 == 'USD') %>%
  top_n(10) %>%
  arrange(desc(cor)) %>%
  rename(top_cor = cor)
## Selecting by cor
cor_cols
##                                                    Var1 Var2   top_cor
## 1                 Net primary income (BoP, current US$)  USD 0.9375048
## 2                    Service exports (BoP, current US$)  USD 0.9017878
## 3                      Goods exports (BoP, current US$)  USD 0.8963826
## 4                     Net domestic credit (current LCU)  USD 0.8947741
## 5            Primary income receipts (BoP, current US$)  USD 0.8925628
## 6                    Service imports (BoP, current US$)  USD 0.8868864
## 7                      Goods imports (BoP, current US$)  USD 0.8723380
## 8                                     GDP (current US$)  USD 0.8627224
## 9            Primary income payments (BoP, current US$)  USD 0.8473236
## 10 Population ages 65 and above (% of total population)  USD 0.8421186
coeff.income_gold.price <- max(USA.WDI$`Net primary income (BoP, current US$)`)/max(USA.WDI$USD)

USA.WDI %>%
  ggplot( aes(x = Year) ) +
  geom_line( aes(y = USD), color = 'gold' ) +
  geom_line( aes(y = `Net primary income (BoP, current US$)`/coeff.income_gold.price), color = 'royalblue' ) + 
  scale_y_continuous(
    name = "Cena złota",
    sec.axis = sec_axis(~.*coeff.gold.SP, name="Net primary income")
  ) +
  ggtitle("Cena złota w porównaniu ze wskaźnikiem przychodu netto w USA") +
  theme_bw() + 
  theme(
    axis.title.y = element_text(color = 'gold', size=13),
    axis.title.y.right = element_text(color = 'royalblue', size=13),
    axis.text.y = element_text(color = 'gold', size=13),
    axis.text.y.right = element_text(color = 'royalblue', size=13)
  )

coeff.service_export.price <- max(USA.WDI$`Service exports (BoP, current US$)`)/max(USA.WDI$USD)

USA.WDI %>%
  ggplot( aes(x = Year) ) +
  geom_line( aes(y = USD), color = 'gold' ) +
  geom_line( aes(y = `Service exports (BoP, current US$)` / coeff.service_export.price), color = 'royalblue' ) + 
  scale_y_continuous(
    name = "Cena złota",
    sec.axis = sec_axis(~.*coeff.gold.SP, name="Service exports")
  ) +
  ggtitle("Cena złota w porównaniu ze wskaźnikiem eksportu usług w USA") +
  theme_bw() + 
  theme(
    axis.title.y = element_text(color = 'gold', size=13),
    axis.title.y.right = element_text(color = 'royalblue', size=13),
    axis.text.y = element_text(color = 'gold', size=13),
    axis.text.y.right = element_text(color = 'royalblue', size=13)
  )